Brian M. Schilder, Bioinformatician II
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
# load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
# load(file.path(resultsPath,"scRNAseq_results.RData"))
library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
library(enrichR) #BiocManager::install("enrichR")
## Upgrade to alpha (development) version of Monocle, as this version has been optimized for large datasets
if("DDRTree" %in% rownames(installed.packages())==F){
devtools::install_github("cole-trapnell-lab/DDRTree", ref="simple-ppt-like")
}
if("graph" %in% rownames(installed.packages())==F){
devtools::install_github("cole-trapnell-lab/L1-graph")
}
if("reticulate" %in% rownames(installed.packages())==F){
install.packages("reticulate")
}
if("flexclust" %in% rownames(installed.packages())==F){
install.packages("flexclust")
}
if("mcclust" %in% rownames(installed.packages())==F){
install.packages("mcclust")
}
library(flexclust)
library(mcclust)
library(reticulate)
if(py_module_available("umap-learn")==F){
reticulate::py_install('umap-learn')# , pip = T, pip_ignore_installed = T # Ensure the latest version of UMAP is installed
}## virtualenv: ~/.virtualenvs/r-reticulate
## Installing packages ...
##
## Installation complete.
if(py_module_available("louvain")==F){
reticulate::py_install("louvain")
system("pip install louvain --user")
}
if("monocle" %in% rownames(installed.packages())==F){
devtools::install_github("cole-trapnell-lab/monocle-release", ref="monocle3_alpha")
}
if("ggrastr" %in% rownames(installed.packages())==F){
devtools::install_github("VPetukhov/ggrastr")
}
library(monocle) # BiocManager::install("monocle")
# BiocManager::install(c('DelayedArray', 'DelayedMatrixStats', 'org.Hs.eg.db', 'org.Mm.eg.db'))
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
#
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
allGenes <- F
}else{allGenes <- T}
allGenes## [1] FALSE
sessioninfo::session_info()## ─ Session info ──────────────────────────────────────────────────────────
## setting value
## version R version 3.5.1 (2018-07-02)
## os macOS 10.14.2
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2019-03-01
##
## ─ Packages ──────────────────────────────────────────────────────────────
## package * version date lib
## acepack 1.4.1 2016-10-29 [2]
## AnnotationDbi * 1.44.0 2018-10-30 [2]
## ape 5.2 2018-09-24 [2]
## assertthat 0.2.0 2017-04-11 [2]
## backports 1.1.3 2018-12-14 [2]
## base64enc 0.1-3 2015-07-28 [2]
## bibtex 0.4.2 2017-06-30 [2]
## Biobase * 2.42.0 2018-10-30 [2]
## BiocGenerics * 0.28.0 2018-10-30 [2]
## BiocParallel * 1.16.6 2019-02-10 [2]
## bit 1.1-14 2018-05-29 [2]
## bit64 0.9-7 2017-05-08 [2]
## bitops 1.0-6 2013-08-17 [2]
## blob 1.1.1 2018-03-25 [2]
## boot 1.3-20 2017-08-06 [2]
## caTools 1.17.1.1 2018-07-20 [2]
## cellranger 1.1.0 2016-07-27 [2]
## checkmate 1.9.1 2019-01-15 [2]
## class 7.3-15 2019-01-01 [2]
## classInt 0.3-1 2018-12-18 [1]
## cli 1.0.1 2018-09-25 [2]
## cluster 2.0.7-1 2018-04-13 [2]
## coda 0.19-2 2018-10-08 [2]
## codetools 0.2-16 2018-12-24 [2]
## colorspace 1.4-0 2019-01-13 [2]
## cowplot * 0.9.4 2019-01-08 [2]
## crayon 1.3.4 2017-09-16 [2]
## crosstalk 1.0.0 2016-12-21 [2]
## data.table * 1.12.0 2019-01-13 [2]
## DBI 1.0.0 2018-05-02 [2]
## DDRTree * 0.1.5 2019-02-22 [1]
## DelayedArray * 0.8.0 2018-10-30 [2]
## deldir 0.1-16 2019-01-04 [1]
## densityClust 0.3 2017-10-24 [2]
## DEoptimR 1.0-8 2016-11-19 [2]
## digest 0.6.18 2018-10-10 [2]
## diptest 0.75-7 2016-12-05 [2]
## docopt 0.6.1 2018-10-11 [2]
## doParallel 1.0.14 2018-09-24 [2]
## doSNOW 1.0.16 2017-12-13 [2]
## dplyr * 0.8.0.1 2019-02-15 [2]
## dtw 1.20-1 2018-05-18 [2]
## e1071 1.7-0.1 2019-01-21 [1]
## enrichR * 1.0 2017-04-02 [2]
## evaluate 0.13 2019-02-12 [2]
## expm 0.999-3 2018-09-22 [2]
## fastICA 1.2-1 2017-06-12 [2]
## fitdistrplus 1.0-14 2019-01-23 [2]
## flexclust * 1.4-0 2018-09-24 [1]
## flexmix 2.3-15 2019-02-18 [2]
## FNN 1.1.3 2019-02-15 [2]
## foreach 1.4.4 2017-12-12 [2]
## foreign 0.8-71 2018-07-20 [2]
## Formula 1.2-3 2018-05-03 [2]
## fpc 2.1-11.1 2018-07-20 [2]
## future 1.11.1.1 2019-01-26 [2]
## garnett * 0.1.3 2019-02-15 [2]
## gbRd 0.4-11 2012-10-01 [2]
## gdata 2.18.0 2017-06-06 [2]
## GeneOverlap * 1.18.0 2018-10-30 [1]
## ggplot2 * 3.1.0 2018-10-25 [2]
## ggrepel * 0.8.0 2018-05-09 [2]
## ggridges 0.5.1 2018-09-27 [2]
## glmnet 2.0-16 2018-04-02 [2]
## globals 0.12.4 2018-10-11 [2]
## glue 1.3.0 2018-07-17 [2]
## gmodels 2.18.1 2018-06-25 [1]
## gplots 3.0.1.1 2019-01-27 [2]
## gridExtra 2.3 2017-09-09 [2]
## gtable 0.2.0 2016-02-26 [2]
## gtools 3.8.1 2018-06-26 [2]
## hdf5r 1.0.1 2018-10-07 [2]
## Hmisc 4.2-0 2019-01-26 [2]
## HSMMSingleCell 1.2.0 2018-11-01 [2]
## htmlTable 1.13.1 2019-01-07 [2]
## htmltools 0.3.6 2017-04-28 [2]
## htmlwidgets 1.3 2018-09-30 [2]
## httpuv 1.4.5.1 2018-12-18 [2]
## httr 1.4.0 2018-12-11 [2]
## ica 1.0-2 2018-05-24 [2]
## igraph * 1.2.4 2019-02-13 [2]
## IRanges * 2.16.0 2018-10-30 [2]
## irlba * 2.3.3 2019-02-05 [2]
## iterators 1.0.10 2018-07-13 [2]
## jsonlite 1.6 2018-12-07 [2]
## kernlab 0.9-27 2018-08-10 [2]
## KernSmooth 2.23-15 2015-06-29 [2]
## knitr 1.21 2018-12-10 [2]
## L1Graph * 0.1.1 2019-02-22 [1]
## lars 1.2 2013-04-24 [2]
## later 0.8.0 2019-02-11 [2]
## lattice * 0.20-38 2018-11-04 [2]
## latticeExtra 0.6-28 2016-02-09 [2]
## lazyeval 0.2.1 2017-10-29 [2]
## LearnBayes 2.15.1 2018-03-18 [1]
## limma 3.38.3 2018-12-02 [2]
## listenv 0.7.0 2018-01-21 [2]
## lmtest 0.9-36 2018-04-04 [2]
## lpSolve * 5.6.13 2015-09-19 [2]
## lpSolveAPI * 5.5.2.0-17 2016-01-13 [1]
## lsei 1.2-0 2017-10-23 [2]
## magrittr 1.5 2014-11-22 [2]
## manipulateWidget 0.10.0 2018-06-11 [2]
## MASS 7.3-51.1 2018-11-01 [2]
## Matrix * 1.2-15 2018-11-01 [2]
## matrixStats * 0.54.0 2018-07-23 [2]
## mcclust * 1.0 2012-07-23 [1]
## mclust 5.4.2 2018-11-17 [2]
## memoise 1.1.0 2017-04-21 [2]
## metap 1.1 2019-02-06 [2]
## mime 0.6 2018-10-05 [2]
## miniUI 0.1.1.1 2018-05-18 [2]
## mixtools 1.1.0 2017-03-10 [2]
## modeltools * 0.2-22 2018-07-16 [2]
## monocle * 2.99.3 2019-02-22 [1]
## munsell 0.5.0 2018-06-12 [2]
## mvtnorm 1.0-8 2018-05-31 [2]
## nlme 3.1-137 2018-04-07 [2]
## nnet 7.3-12 2016-02-02 [2]
## npsurv 0.4-0 2017-10-14 [2]
## org.Hs.eg.db * 3.7.0 2019-02-15 [2]
## pbapply 1.4-0 2019-02-05 [2]
## pbmcapply 1.3.1 2019-01-14 [1]
## pheatmap 1.0.12 2019-01-04 [2]
## pillar 1.3.1 2018-12-15 [2]
## pkgconfig 2.0.2 2018-08-16 [2]
## plotly * 4.8.0 2018-07-20 [2]
## plyr 1.8.4 2016-06-08 [2]
## png 0.1-7 2013-12-03 [2]
## prabclus 2.2-7 2019-01-17 [2]
## promises 1.0.1 2018-04-13 [2]
## proxy 0.4-22 2018-04-08 [2]
## purrr 0.3.0 2019-01-27 [2]
## qlcMatrix 0.9.7 2018-04-20 [2]
## R.methodsS3 1.7.1 2016-02-16 [2]
## R.oo 1.22.0 2018-04-22 [2]
## R.utils 2.8.0 2019-02-14 [2]
## R6 2.4.0 2019-02-14 [2]
## RANN 2.6.1 2019-01-08 [2]
## RColorBrewer 1.1-2 2014-12-07 [2]
## Rcpp 1.0.0 2018-11-07 [2]
## Rdpack 0.10-1 2018-10-04 [2]
## readxl * 1.3.0 2019-02-15 [2]
## reshape2 * 1.4.3 2017-12-11 [2]
## reticulate * 1.10 2018-08-05 [2]
## rgl 0.99.16 2018-03-28 [2]
## rjson 0.2.20 2018-06-08 [2]
## rlang 0.3.1 2019-01-08 [2]
## rmarkdown 1.11 2018-12-08 [2]
## robustbase 0.93-3 2018-09-21 [2]
## ROCR 1.0-7 2015-03-26 [2]
## rpart 4.1-13 2018-02-23 [2]
## RSQLite 2.1.1 2018-05-06 [2]
## rstudioapi 0.9.0 2019-01-09 [2]
## Rtsne 0.15 2018-11-10 [2]
## S4Vectors * 0.20.1 2018-11-09 [2]
## scales 1.0.0 2018-08-09 [2]
## SDMTools 1.1-221 2014-08-05 [2]
## segmented 0.5-3.0 2017-11-30 [2]
## sessioninfo 1.1.1 2018-11-05 [2]
## Seurat * 2.3.4 2018-07-17 [2]
## sf 0.7-3 2019-02-21 [1]
## shiny 1.2.0 2018-11-02 [2]
## slam 0.1-44 2018-12-21 [2]
## snow 0.4-3 2018-09-14 [2]
## sp 1.3-1 2018-06-05 [1]
## sparsesvd 0.1-4 2018-02-15 [2]
## spData 0.3.0 2019-01-07 [1]
## spdep 1.0-2 2019-02-13 [1]
## stringi 1.3.1 2019-02-13 [2]
## stringr 1.4.0 2019-02-10 [2]
## survival 2.43-3 2018-11-26 [2]
## tibble 2.0.1 2019-01-12 [2]
## tidyr 0.8.2 2018-10-28 [2]
## tidyselect 0.2.5 2018-10-11 [2]
## trimcluster 0.1-2.1 2018-07-20 [2]
## tsne 0.1-3 2016-07-15 [2]
## units 0.6-2 2018-12-05 [1]
## VGAM 1.1-1 2019-02-18 [1]
## viridis 0.5.1 2018-03-29 [2]
## viridisLite 0.3.0 2018-02-01 [2]
## webshot 0.5.1 2018-09-28 [2]
## withr 2.1.2 2018-03-15 [2]
## xfun 0.5 2019-02-20 [1]
## xtable 1.8-3 2018-08-29 [2]
## yaml 2.2.0 2018-07-25 [2]
## zoo 1.8-4 2018-09-19 [2]
## source
## CRAN (R 3.5.0)
## Bioconductor
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.2)
## CRAN (R 3.5.2)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## Github (cole-trapnell-lab/DDRTree@43f3dc3)
## Bioconductor
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## Github (cole-trapnell-lab/garnett@d04a89b)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## Bioconductor
## Github (cran/ggplot2@13ca86f)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## Bioconductor
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## Bioconductor
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.1)
## Github (cole-trapnell-lab/L1-graph@c2a3f26)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## Bioconductor
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## Github (cole-trapnell-lab/monocle-release@4d37597)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## Bioconductor
## CRAN (R 3.5.1)
## CRAN (R 3.5.2)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.1)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## Bioconductor
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.1)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.2)
## CRAN (R 3.5.1)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.2)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
## CRAN (R 3.5.0)
##
## [1] /Users/schilder/Library/R/3.5/library
## [2] /Library/Frameworks/R.framework/Versions/3.5/Resources/library
# Import Seurat obj
load(file.path(root, "Data/seurat_object_add_HTO_ids.Rdata"))
DAT <- seurat.obj
rm(seurat.obj)
# Add metadata
metadata <- read.table(file.path(root,"Data/meta.data4.tsv"))
DAT <- AddMetaData(object = DAT, metadata = metadata)
protDAT <- subsetBiotypes(DAT, subsetGenes = "protein_coding")## Subsetting genes: protein_coding
## 14827 / 24914 genes are protein_coding
protDAT <- remove_nonmatched_metadata(protDAT, subsetCells = F)
protDAT <- FindVariableGenes(object = protDAT, mean.function = ExpMean,
dispersion.function = LogVMR,
selection.method ="dispersion", do.plot = T,
top.genes = 2000)## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'
# Pass TRUE if you want to see progress output on some of Monocle 3's operations
DelayedArray:::set_verbose_block_processing(TRUE)## [1] FALSE
# Passing a higher value will make some computations faster but use more memory. Adjust with caution!
options(DelayedArray.block.size=1000e6)
## Construct CDS object manually
# mDAT <- newCellDataSet(cellData = DAT@scale.data,
# featureData = AnnotatedDataFrame( data.frame(gene_short_name=row.names(DAT@scale.data),
# row.names = row.names(DAT@scale.data))
# ),
# phenoData = AnnotatedDataFrame(DAT@meta.data))
# mDAT <- estimateSizeFactors(mDAT) #DESeq2?
# mDAT <- preprocessCDS(mDAT, method = "PCA", num_dim = 10, norm_method = "none")
## Construct CDS object automaticaly
### NOTE!: importCDS takes only the raw.data (not scale.data)
mDAT <- monocle::importCDS(protDAT, import_all = T)
mDAT <- estimateSizeFactors(mDAT) #DESeq2?
mDAT <- estimateDispersions(mDAT)## Processing block 1/23 ... OK
## Processing block 2/23 ... OK
## Processing block 3/23 ... OK
## Processing block 4/23 ... OK
## Processing block 5/23 ... OK
## Processing block 6/23 ... OK
## Processing block 7/23 ... OK
## Processing block 8/23 ... OK
## Processing block 9/23 ... OK
## Processing block 10/23 ... OK
## Processing block 11/23 ... OK
## Processing block 12/23 ... OK
## Processing block 13/23 ... OK
## Processing block 14/23 ... OK
## Processing block 15/23 ... OK
## Processing block 16/23 ... OK
## Processing block 17/23 ... OK
## Processing block 18/23 ... OK
## Processing block 19/23 ... OK
## Processing block 20/23 ... OK
## Processing block 21/23 ... OK
## Processing block 22/23 ... OK
## Processing block 23/23 ... OK
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm stopped at boundary value
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in parametricDispersionFit(disp_table, verbose): Dispersion fit did
## not converge.
## Removing 203 outliers
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm stopped at boundary value
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning in log(ifelse(y == 0, 1, y/mu)): NaNs produced
## Warning: step size truncated due to divergence
## Warning: glm.fit: algorithm did not converge
## Warning in parametricDispersionFit(disp_table[row.names(disp_table) %in% :
## Dispersion fit did not converge.
# Regressing out ID will also regress out mutation status
mDAT <- preprocessCDS(mDAT, num_dim = 20, residualModelFormulaStr = "~ nUMI + percent.mito") ## Warning in if (method == "PCA") {: the condition has length > 1 and only
## the first element will be used
## Processing block 1/4 ... OK
## Processing block 2/4 ... OK
## Processing block 3/4 ... OK
## Processing block 4/4 ... OK
# !Important! (allows replicability)
set.seed(1) # Monocle sets their seed to 2016 by default (Seurat sets it to 1)
# Options: c("UMAP", "tSNE", "DDRTree", "ICA", "none")
## UMAP
# monocle:::UMAP( )
mDAT <- reduceDimension(mDAT, reduction_method = 'UMAP',
max_components = 3,
metric="cosine",
n_threads = parallel::detectCores())
## t-SNE
# mDAT <- reduceDimension(mDAT, reduction_method = 'tSNE',
# perplexity = 30,
# scaling = F,
# num_threads = parallel::detectCores())
pretty_colors <- function(mDAT, var="Cluster", palette="custom"){
unique_var <- unique(as.character(pData(mDAT)[var][,1]))
if(palette=="custom"){
col_vector_origin <- c("mediumorchid","deepskyblue","limegreen","steelblue",
"hotpink","turquoise", "blueviolet","mediumvioletred",
"#db83da","#53c35d","#a546bb","#718fe8","#a469e6",
"#babb3d","#4f66dc","#e68821","#83b837","#d6ac3e",
"#7957b4","#468e36","#d347ae","#5dbf8c","#e53e76",
"#42c9b8","#dd454a","#3bbac6","#d5542c","#59aadc",
"#cf8b36","#4a61b0","#8b8927","#a24e99","#9cb36a",
"#ca3e87","#36815b","#b23c4e","#5c702c","#b79add",
"#a55620","#5076af","#e38f67","#85609c","#caa569",
"#9b466c","#88692c","#dd81a9","#a35545","#e08083",
"#17becf","#9edae5")
} else{col_vector_origin <- RColorBrewer::brewer.pal(length(unique_var),palette)}
# barplot(1:length(col_vector_origin) , col=col_vector_origin)
col_vector <- col_vector_origin[1:length(unique_var)]
names(col_vector) <- unique_var
return(col_vector)
} plotDR <- function(resDAT, metavar="Cluster", title=""){
options(repr.plot.width = 11)
options(repr.plot.height = 8)
col_vector <- pretty_colors(resDAT, var=metavar)
p <- plot_cell_clusters(resDAT,
color_by = metavar,
cell_size = 0.8,
show_group_id = T) +
scale_color_manual(values = col_vector) +
theme(legend.text=element_text(size=6)) +
theme(legend.position="right")+
labs(title = paste(title))
print(p)
}
test_hyperparameters <- function(mDAT, resolutions=seq(0, 1e-4,length.out=6),
Ks=seq(10,60,length.out=6),
iter_var="k"){
if(iter_var=="resolution"){
for(res in resolutions){
res_title <- paste("Resolution =",res)
cat("\n")
cat("###",res_title,"\n")
try({
resDAT <- clusterCells(mDAT, res=res,# k=43,
method = "louvain",# densityPeak
louvain_iter = 1,
verbose = F,
clustering_genes = clustering_genes,
cores = parallel::detectCores())
plotDR(resDAT, title=res_title)
})
cat("\n")
}
}else{
for(k in Ks){
k_title <- paste("K =",k)
cat("\n")
cat("###",k_title,"\n")
try({
kDAT <- clusterCells(mDAT, res=8.888889e-05, k=k,
method = "louvain",# densityPeak
louvain_iter = 1,
verbose = F,
clustering_genes = clustering_genes,
cores = parallel::detectCores())
plotDR(kDAT, title=k_title)
})
cat("\n")
}
}
}
test_hyperparameters(mDAT, iter_var="resolution")## Loading required package: ggrastr
# test_hyperparameters(mDAT, iter_var="k")
# Select final resolution
## Use ONLY the 1000 most variable genes to do clustering (otherwise there's too much noise)
clustering_genes <- intersect(mDAT@featureData@data$gene_short_name, protDAT@var.genes)[1:1000]
louvain_res <- 1e-04 # 1e-04 is the default
mDAT <- clusterCells(mDAT, res=louvain_res,
method = "louvain",
louvain_iter = 1,
clustering_genes = clustering_genes,
verbose = T, cores = parallel::detectCores())## Run kNN based graph clustering starts:
## -Input data of 19144 rows and 3 columns
## -k is set to 20
Finding nearest neighbors…DONE ~ 0.124 s Compute jaccard coefficient between nearest-neighbor sets …DONE ~ 0.008 s Build undirected graph from the weighted links …DONE ~ 0.121 s Run louvain clustering on the graph … Running louvain iteration 1 …
## Current iteration is 1; current resolution is 1e-04; Modularity is 0.246722458537894; Number of clusters are 6
## Maximal modularity is 0.246722458537894; corresponding resolution is 1e-04
##
## Run kNN based graph clustering DONE, totally takes 3.21154999732971 s.
-Number of clusters: 6
# mDAT <- clusterCells(mDAT, num_clusters = 4,
# method = "densityPeak",
# verbose = F,
# cores = parallel::detectCores())
plotDR(mDAT, "Cluster")plotDR(mDAT, "dx")plotDR(mDAT, "mut")# CLUSTERING FROM SEURAT
# plotDR(mDAT, "post_clustering")# generate size factors for normalization later
# Get pre-trained PBMC classifer
load(file.path(root, "Data/Garnett/hsPBMC")) # Download from: https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC
mDAT <- garnett::classify_cells(mDAT,
classifier = hsPBMC,
db = org.Hs.eg.db,
cluster_extend = TRUE,
cds_gene_id_type = "SYMBOL")## Processing block 1/22 ... OK
## Processing block 2/22 ... OK
## Processing block 3/22 ... OK
## Processing block 4/22 ... OK
## Processing block 5/22 ... OK
## Processing block 6/22 ... OK
## Processing block 7/22 ... OK
## Processing block 8/22 ... OK
## Processing block 9/22 ... OK
## Processing block 10/22 ... OK
## Processing block 11/22 ... OK
## Processing block 12/22 ... OK
## Processing block 13/22 ... OK
## Processing block 14/22 ... OK
## Processing block 15/22 ... OK
## Processing block 16/22 ... OK
## Processing block 17/22 ... OK
## Processing block 18/22 ... OK
## Processing block 19/22 ... OK
## Processing block 20/22 ... OK
## Processing block 21/22 ... OK
## Processing block 22/22 ... OK
## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
table(pData(mDAT)$cell_type)##
## B cells CD34+ CD4 T cells CD8 T cells
## 13 92 3 1
## Dendritic cells Monocytes NK cells T cells
## 282 15029 804 17
## Unknown
## 2903
cell_summary <- table(pData(mDAT)$cluster_ext_type)
cell_summary##
## B cells CD34+ CD4 T cells CD8 T cells
## 12 13 3 1
## Dendritic cells Monocytes NK cells T cells
## 144 17435 686 9
## Unknown
## 841
# Get feature genes for each cell type
feature_genes <- garnett::get_feature_genes(classifier = hsPBMC,
node = "root",
db = org.Hs.eg.db,
convert_ids = F)
head(feature_genes) ## B cells CD34+ Dendritic cells Monocytes
## (Intercept) -0.884989922 0.47694427 -0.124629734 -0.0301907756
## ENSG00000196154 -0.005499815 -0.01088859 -0.007978714 0.0056365451
## ENSG00000019582 0.030146634 -0.01959215 0.015232191 0.0001883507
## ENSG00000156738 2.193353690 -0.66191294 -1.104899464 -0.7393680112
## ENSG00000177455 2.056641938 -0.71851810 -1.941859710 -1.0448682467
## ENSG00000105369 1.800296145 -0.53835614 0.063380804 -0.1973728377
## NK cells T cells Unknown
## (Intercept) -0.911657164 0.524757370 0.949765956
## ENSG00000196154 -0.001627233 -0.005835858 0.026193667
## ENSG00000019582 -0.005880096 -0.023473024 0.003378099
## ENSG00000156738 -0.096566144 -0.602417257 1.011810124
## ENSG00000177455 0.297279508 0.177790907 1.173533704
## ENSG00000105369 -0.618567693 -0.963141845 0.453761563
# If a cell type is called less than n times, re-categorize it as unknown
mDAT$cluster_ext_type_filt <- ifelse(mDAT$cluster_ext_type %in% names(cell_summary[cell_summary<50]),
"Unknown", mDAT$cluster_ext_type)
plot_cell_clusters(mDAT, color_by ="cell_type", cell_size = .8) + facet_wrap(~dx)plot_cell_clusters(mDAT, color_by ="cluster_ext_type", cell_size = .8) plot_cell_clusters(mDAT, color_by ="cluster_ext_type_filt", cell_size = .8) plot_cell_clusters(mDAT, markers = c("CD14","FCGR3A"), cell_size = 0.8)## Warning: Using alpha for a discrete variable is not advised.
plot_cell_clusters(mDAT, markers = c("LRRK2","GBA"), cell_size = 0.8)## Warning: Using alpha for a discrete variable is not advised.
plot_cell_clusters(mDAT, markers = c("MS4A4A","MS4A6A"), cell_size = 0.8) ## Warning: Using alpha for a discrete variable is not advised.
plot_markers_cluster(mDAT, markers = protDAT@var.genes[1:50], minimal_cluster_fraction = 0.05)# subsetCDS(mDAT, )
mDAT <- partitionCells(mDAT)
mDAT <- learnGraph(mDAT, RGE_method = 'SimplePPT',
n_threads = parallel::detectCores())## Warning: Deprecated, use tibble::rownames_to_column() instead.
## Warning: Deprecated, use tibble::rownames_to_column() instead.
## Warning: Deprecated, use tibble::rownames_to_column() instead.
plot_cell_trajectory(mDAT, color_by = "Cluster", cell_size = .01) plot_cell_trajectory(mDAT, color_by = "dx", cell_size = .01) plot_cell_trajectory(mDAT, color_by = "mut", cell_size = .01) plot_cell_trajectory(mDAT, color_by = "cluster_ext_type_filt", cell_size = .01) # dir.create(file.path(resultsPath, "pseudotime"), showWarnings = F)
# plot_3d_cell_trajectory(mDAT,
# color_by="cell_type",
# webGL_filename= file.path(resultsPath, "pseudotime/pseudotime_cellType.html"),
# show_backbone=TRUE,
# useNULL_GLdev=TRUE)
# plot_3d_cell_trajectory(mDAT, markers = c('FCGR3A'),
# webGL_filename=file.path(resultsPath, "pseudotime/pseudotime_FCGR3A.html"),
# show_backbone=TRUE,
# useNULL_GLdev=TRUE)mDAT_sub <- mDAT[protDAT@var.genes[1:50], mDAT@phenoData$Cluster %in% c(1,2)]
# At ~1min/100 genes, this DGE method will take ~2.5hours to run for 14827 genes
DEG_df <- differentialGeneTest(mDAT_sub, verbose = T,
fullModelFormulaStr = "~Cluster",
cores = parallel::detectCores())
# Plot
mDAT_sub_genes <- mDAT_sub[row.names(DEG_df)[1:4],]
plot_genes_jitter(mDAT_sub_genes, grouping = "Cluster", ncol= 4, color_by = "dx", plot_trend = T)CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){
# sum(colnames(mDAT) != colnames( mDAT@reducedDimS))
## Manually save reduced dimensions
if(export_PC==T){
mDAT$PC1 <- mDAT@normalized_data_projection[,1]
mDAT$PC2 <- mDAT@normalized_data_projection[,2]
mDAT$PC3 <- mDAT@normalized_data_projection[,3]
}
if(export_UMAP==T){
mDAT$UMAP1 <- mDAT@reducedDimA[1,]
mDAT$UMAP2 <- mDAT@reducedDimA[2,]
mDAT$UMAP3 <- mDAT@reducedDimA[3,]
}
DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
DAT@scale.data <- DAT@data #Data was already scaled in Monocle
# DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)## Scaling data matrix
head(DAT@meta.data) ## nGene nUMI orig.ident singlet.or.not.binary HTO
## GTCATTTTCCGCATCT 2035 6729 RAJ_13357 1 NYUMD0011
## ATAAGAGAGGAGTTGC 1914 6718 RAJ_13357 1 BID0076
## CATTATCCATGGTCTA 1971 6156 RAJ_13357 1 MSMD0067
## CTTAGGAGTGGCGAAT 1893 6392 RAJ_13357 1 NYUMD0011
## CATCAGAAGCACCGCT 1967 6110 RAJ_13357 1 MSMD0067
## GCGGGTTAGTAGCCGA 1868 5977 RAJ_13357 1 NYUMD0011
## ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT MSMD0207 0.04191439 0 1 0 0
## ATAAGAGAGGAGTTGC BIMD0076 0.04362066 0 1 0 0
## CATTATCCATGGTCTA NYUMD0015 0.03086921 0 1 0 0
## CTTAGGAGTGGCGAAT MSMD0207 0.03613892 0 1 0 0
## CATCAGAAGCACCGCT NYUMD0015 0.04437531 19 10 8 0
## GCGGGTTAGTAGCCGA MSMD0207 0.03514056 6 2 0 0
## barcode dx mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT PD PD White M 71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC PD GBA White M 47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control White M 51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT PD PD White M 71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control White M 51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA PD PD White M 71
## Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT 5.117674 1 1 5
## ATAAGAGAGGAGTTGC 5.109379 1 1 5
## CATTATCCATGGTCTA 4.661479 1 1 1
## CTTAGGAGTGGCGAAT 4.844786 1 1 1
## CATCAGAAGCACCGCT 4.610054 1 1 7
## GCGGGTTAGTAGCCGA 4.508861 1 1 7
## cell_type cluster_ext_type cluster_ext_type_filt PC1
## GTCATTTTCCGCATCT Monocytes Monocytes Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes Monocytes Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes Monocytes Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes Monocytes Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes Monocytes Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes Monocytes Monocytes 17.77910
## PC2 PC3 UMAP1 UMAP2 UMAP3
## GTCATTTTCCGCATCT 0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC 1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT 4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA 2.61114660 -7.713996 1.303913 1.455874 1.130939
markerList <- c("CD14", "FCGR3A")
markerData <- DAT@scale.data[row.names(DAT@scale.data) %in% markerList,] %>% t() %>%
as.matrix() %>% data.table(keep.rownames = T, key = "rn")
markerData[markerData$markerList[1]==0,] <- NA
markerData[markerData$markerList[2]==0,] <- NA
## Efficiently merge using data.table
# dt1 <- data.table(markerData, keep.rownames = "cell_type", key = "cell_type") %>% copy()
dt2 <- data.table(DAT@meta.data[c("cluster_ext_type_filt","Cluster")], keep.rownames = "Cell", key = "Cell") %>% copy()
row.names(dt2) <- dt2$Cell
markerDT <- markerData[dt2]
ggplot(data = markerDT, aes(x=CD14, y=FCGR3A) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="purple", bins = 100, size=.1) +
geom_point(aes(color=as.factor(cluster_ext_type_filt)), shape=16, stroke=0, size=2, alpha=.5) +
guides(colour = guide_legend(title="cluster_ext_type_filt",override.aes = list(alpha = 1))) +
scale_color_manual(values = pretty_colors(mDAT, var = "cluster_ext_type_filt"))## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive
Need to get tSNE from Monocle into Seurat somehow.
FeaturePlot(DAT,features.plot = c("CD14","FCGR3A"),
cols.use = c("grey","red","blue","purple"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),
cols.use = c("grey","purple","cyan","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT, features.plot = c("MS4A4A","MS4A6A"),
cols.use = c("grey","red","green","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F)identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F){
DAT <- SetIdent(DAT, ident.use = DAT@meta.data$Cluster)
if(allGenes==F){
clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25,
only.pos = F, test.use = "wilcox")
clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25,
only.pos = F, test.use = "wilcox")
}else{
clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25,
only.pos = F, test.use = "wilcox",
logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
min.cells.gene = 1, min.diff.pct = -Inf)
clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25,
only.pos = F, test.use = "wilcox",
logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
min.cells.gene = 1, min.diff.pct = -Inf)
}
clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
subset(p_val_adj<=0.05)
clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
subset(p_val_adj<=0.05)
difference <- abs( length(row.names(clustA.uniqueMarkers)) - length(row.names(clustB.uniqueMarkers) ) )
uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
Cluster1_markers=row.names(clustB.uniqueMarkers))
write.csv(uniqueMarkers,
file.path(resultsPath,"unique_cluster_markers.csv"),
quote = F, row.names = F)
createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 0 and Cluster 1")
return(uniqueMarkers)
}
uniqueMarkers <- identify_unique_markers(clustDAT, clusterA = 1, clusterB = 2, allGenes=allGenes)# clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control",
allGenes = F) # clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA",
allGenes = F)## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control", clusterList = c(0,1),
allGenes = F)## Error in WhichCells(object = object, ident = ident.use, ident.remove = ident.remove, : Identity : 0 not found.
DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", clusterList = c(0,1),
allGenes = F)## Error in WhichCells(object = object, ident = ident.use, ident.remove = ident.remove, : Identity : 0 not found.
enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
geneList <- data.frame(Gene=row.names(DEGs_monocytes),
Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))
results <- enrichr(genes = geneList, databases = enrichr_dbs ) Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.
for (db in enrichr_dbs){
cat('\n')
cat("##",db,"\n")
# res <- subset(results[[db]], Adjusted.P.value<=0.05)
createDT_html(results[[db]], paste("Enrichr Results:",db))
cat('\n')
}